From 4d722b0de9ea6b24f405e74eb41d3040104f9b6c Mon Sep 17 00:00:00 2001
From: "kaf24@scramble.cl.cam.ac.uk" <kaf24@scramble.cl.cam.ac.uk>
Date: Mon, 26 Jul 2004 15:09:53 +0000
Subject: [PATCH] bitkeeper revision 1.1108.17.1
 (41051ec1NERNxLF017rAWe7ljBk92w)

A better fix for blkdev request merging. Should work for both IDE and
SCSI, and do as much merging as possible, and also no need for PIO
fallback mode.
---
 .rootkeys                                     |   1 +
 linux-2.4.26-xen-sparse/include/asm-xen/pci.h |  25 +-
 .../include/linux/blkdev.h                    | 372 ++++++++++++++++++
 3 files changed, 376 insertions(+), 22 deletions(-)
 create mode 100644 linux-2.4.26-xen-sparse/include/linux/blkdev.h

diff --git a/.rootkeys b/.rootkeys
index 053bb5cede..39fcdfbc47 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -135,6 +135,7 @@
 3f689063nhrIRsMMZjZxMFk7iEINqQ linux-2.4.26-xen-sparse/include/asm-xen/xen_proc.h
 40659defgWA92arexpMGn8X3QMDj3w linux-2.4.26-xen-sparse/include/asm-xen/xor.h
 3f056927gMHl7mWB89rb73JahbhQIA linux-2.4.26-xen-sparse/include/linux/blk.h
+41051ec1m6bJVjZocTG0C0V0O6RsVg linux-2.4.26-xen-sparse/include/linux/blkdev.h
 401c0590D_kwJDU59X8NyvqSv_Cl2A linux-2.4.26-xen-sparse/include/linux/sched.h
 40a248afgI0_JKthdYAe8beVfXSTpQ linux-2.4.26-xen-sparse/include/linux/skbuff.h
 3e5a4e686V0nioX2ZpFf056sgvdiQw linux-2.4.26-xen-sparse/include/linux/sunrpc/debug.h
diff --git a/linux-2.4.26-xen-sparse/include/asm-xen/pci.h b/linux-2.4.26-xen-sparse/include/asm-xen/pci.h
index 382b7a41de..74ae5ba8b1 100644
--- a/linux-2.4.26-xen-sparse/include/asm-xen/pci.h
+++ b/linux-2.4.26-xen-sparse/include/asm-xen/pci.h
@@ -145,8 +145,7 @@ static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
 static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
 			     int nents, int direction)
 {
-	int i, j, nr_pfns;
-	unsigned long first_pfn;
+	int i;
 
 	if (direction == PCI_DMA_NONE)
 		out_of_line_bug();
@@ -160,28 +159,10 @@ static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
  		else if (!sg[i].address && !sg[i].page)
  			out_of_line_bug();
  
- 		if (sg[i].address) {
+ 		if (sg[i].address)
  			sg[i].dma_address = virt_to_bus(sg[i].address);
- 			first_pfn = virt_to_phys(sg[i].address) >> PAGE_SHIFT;
- 			nr_pfns = (((unsigned long)sg[i].address & 
- 			    (PAGE_SIZE-1)) + sg[i].length + PAGE_SIZE - 1) >>
- 			    PAGE_SHIFT;
- 		} else {
+ 		else
  			sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
- 			first_pfn = page_to_phys(sg[i].page) >> PAGE_SHIFT;
- 			nr_pfns = (sg[i].offset + sg[i].length + PAGE_SIZE - 
- 			    1) >> PAGE_SHIFT;
- 		}
-
-                /*
-                 * Check that we merged physical buffers are also contiguous
-                 * in machine-address space. We try to fail by returning 0.
-                 */
-                for (j = 1; j < nr_pfns; j++) {
-                    if ( unlikely(pfn_to_mfn(first_pfn+j) != 
-                                  (pfn_to_mfn(first_pfn)+j)) )
-                        return 0;
-                }
  	}
  
 	flush_write_buffers();
diff --git a/linux-2.4.26-xen-sparse/include/linux/blkdev.h b/linux-2.4.26-xen-sparse/include/linux/blkdev.h
new file mode 100644
index 0000000000..09178c4203
--- /dev/null
+++ b/linux-2.4.26-xen-sparse/include/linux/blkdev.h
@@ -0,0 +1,372 @@
+#ifndef _LINUX_BLKDEV_H
+#define _LINUX_BLKDEV_H
+
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/genhd.h>
+#include <linux/tqueue.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+
+#include <asm/io.h>
+
+struct request_queue;
+typedef struct request_queue request_queue_t;
+struct elevator_s;
+typedef struct elevator_s elevator_t;
+
+/*
+ * Ok, this is an expanded form so that we can use the same
+ * request for paging requests.
+ */
+struct request {
+	struct list_head queue;
+	int elevator_sequence;
+
+	volatile int rq_status;	/* should split this into a few status bits */
+#define RQ_INACTIVE		(-1)
+#define RQ_ACTIVE		1
+#define RQ_SCSI_BUSY		0xffff
+#define RQ_SCSI_DONE		0xfffe
+#define RQ_SCSI_DISCONNECTING	0xffe0
+
+	kdev_t rq_dev;
+	int cmd;		/* READ or WRITE */
+	int errors;
+	unsigned long start_time;
+	unsigned long sector;
+	unsigned long nr_sectors;
+	unsigned long hard_sector, hard_nr_sectors;
+	unsigned int nr_segments;
+	unsigned int nr_hw_segments;
+	unsigned long current_nr_sectors, hard_cur_sectors;
+	void * special;
+	char * buffer;
+	struct completion * waiting;
+	struct buffer_head * bh;
+	struct buffer_head * bhtail;
+	request_queue_t *q;
+};
+
+#include <linux/elevator.h>
+
+typedef int (merge_request_fn) (request_queue_t *q, 
+				struct request  *req,
+				struct buffer_head *bh,
+				int);
+typedef int (merge_requests_fn) (request_queue_t *q, 
+				 struct request  *req,
+				 struct request  *req2,
+				 int);
+typedef void (request_fn_proc) (request_queue_t *q);
+typedef request_queue_t * (queue_proc) (kdev_t dev);
+typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh);
+typedef void (plug_device_fn) (request_queue_t *q, kdev_t device);
+typedef void (unplug_device_fn) (void *q);
+
+struct request_list {
+	unsigned int count;
+	unsigned int pending[2];
+	struct list_head free;
+};
+
+struct request_queue
+{
+	/*
+	 * the queue request freelist, one for reads and one for writes
+	 */
+	struct request_list	rq;
+
+	/*
+	 * The total number of requests on each queue
+	 */
+	int nr_requests;
+
+	/*
+	 * Batching threshold for sleep/wakeup decisions
+	 */
+	int batch_requests;
+
+	/*
+	 * The total number of 512byte blocks on each queue
+	 */
+	atomic_t nr_sectors;
+
+	/*
+	 * Batching threshold for sleep/wakeup decisions
+	 */
+	int batch_sectors;
+
+	/*
+	 * The max number of 512byte blocks on each queue
+	 */
+	int max_queue_sectors;
+
+	/*
+	 * Together with queue_head for cacheline sharing
+	 */
+	struct list_head	queue_head;
+	elevator_t		elevator;
+
+	request_fn_proc		* request_fn;
+	merge_request_fn	* back_merge_fn;
+	merge_request_fn	* front_merge_fn;
+	merge_requests_fn	* merge_requests_fn;
+	make_request_fn		* make_request_fn;
+	plug_device_fn		* plug_device_fn;
+	/*
+	 * The queue owner gets to use this for whatever they like.
+	 * ll_rw_blk doesn't touch it.
+	 */
+	void			* queuedata;
+
+	/*
+	 * This is used to remove the plug when tq_disk runs.
+	 */
+	struct tq_struct	plug_tq;
+
+	/*
+	 * Boolean that indicates whether this queue is plugged or not.
+	 */
+	int			plugged:1;
+
+	/*
+	 * Boolean that indicates whether current_request is active or
+	 * not.
+	 */
+	int			head_active:1;
+
+	/*
+	 * Boolean that indicates you will use blk_started_sectors
+	 * and blk_finished_sectors in addition to blk_started_io
+	 * and blk_finished_io.  It enables the throttling code to 
+	 * help keep the sectors in flight to a reasonable value
+	 */
+	int			can_throttle:1;
+
+	unsigned long		bounce_pfn;
+
+	/*
+	 * Is meant to protect the queue in the future instead of
+	 * io_request_lock
+	 */
+	spinlock_t		queue_lock;
+
+	/*
+	 * Tasks wait here for free read and write requests
+	 */
+	wait_queue_head_t	wait_for_requests;
+};
+
+#define blk_queue_plugged(q)	(q)->plugged
+#define blk_fs_request(rq)	((rq)->cmd == READ || (rq)->cmd == WRITE)
+#define blk_queue_empty(q)	list_empty(&(q)->queue_head)
+
+extern inline int rq_data_dir(struct request *rq)
+{
+	if (rq->cmd == READ)
+		return READ;
+	else if (rq->cmd == WRITE)
+		return WRITE;
+	else {
+		BUG();
+		return -1; /* ahem */
+	}
+}
+
+extern unsigned long blk_max_low_pfn, blk_max_pfn;
+
+#define BLK_BOUNCE_HIGH		((u64)blk_max_low_pfn << PAGE_SHIFT)
+#define BLK_BOUNCE_ANY		((u64)blk_max_pfn << PAGE_SHIFT)
+
+extern void blk_queue_bounce_limit(request_queue_t *, u64);
+
+#ifdef CONFIG_HIGHMEM
+extern struct buffer_head *create_bounce(int, struct buffer_head *);
+extern inline struct buffer_head *blk_queue_bounce(request_queue_t *q, int rw,
+						   struct buffer_head *bh)
+{
+	struct page *page = bh->b_page;
+
+#ifndef CONFIG_DISCONTIGMEM
+	if (page - mem_map <= q->bounce_pfn)
+#else
+	if ((page - page_zone(page)->zone_mem_map) + (page_zone(page)->zone_start_paddr >> PAGE_SHIFT) <= q->bounce_pfn)
+#endif
+		return bh;
+
+	return create_bounce(rw, bh);
+}
+#else
+#define blk_queue_bounce(q, rw, bh)	(bh)
+#endif
+
+#ifdef CONFIG_XEN
+/* Used for buffer merging, where it is imperative we use machine addresses! */
+#define bh_phys(bh)		(page_to_bus((bh)->b_page) + bh_offset((bh)))
+#else
+#define bh_phys(bh)		(page_to_phys((bh)->b_page) + bh_offset((bh)))
+#endif
+
+#define BH_CONTIG(b1, b2)	(bh_phys((b1)) + (b1)->b_size == bh_phys((b2)))
+#define BH_PHYS_4G(b1, b2)	((bh_phys((b1)) | 0xffffffff) == ((bh_phys((b2)) + (b2)->b_size - 1) | 0xffffffff))
+
+struct blk_dev_struct {
+	/*
+	 * queue_proc has to be atomic
+	 */
+	request_queue_t		request_queue;
+	queue_proc		*queue;
+	void			*data;
+};
+
+struct sec_size {
+	unsigned block_size;
+	unsigned block_size_bits;
+};
+
+/*
+ * Used to indicate the default queue for drivers that don't bother
+ * to implement multiple queues.  We have this access macro here
+ * so as to eliminate the need for each and every block device
+ * driver to know about the internal structure of blk_dev[].
+ */
+#define BLK_DEFAULT_QUEUE(_MAJOR)  &blk_dev[_MAJOR].request_queue
+
+extern struct sec_size * blk_sec[MAX_BLKDEV];
+extern struct blk_dev_struct blk_dev[MAX_BLKDEV];
+extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size);
+extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size);
+extern void generic_make_request(int rw, struct buffer_head * bh);
+extern inline request_queue_t *blk_get_queue(kdev_t dev);
+extern void blkdev_release_request(struct request *);
+
+/*
+ * Access functions for manipulating queue properties
+ */
+extern int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors);
+extern void blk_init_queue(request_queue_t *, request_fn_proc *);
+extern void blk_cleanup_queue(request_queue_t *);
+extern void blk_queue_headactive(request_queue_t *, int);
+extern void blk_queue_throttle_sectors(request_queue_t *, int);
+extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
+extern void generic_unplug_device(void *);
+extern inline int blk_seg_merge_ok(struct buffer_head *, struct buffer_head *);
+
+extern int * blk_size[MAX_BLKDEV];
+
+extern int * blksize_size[MAX_BLKDEV];
+
+extern int * hardsect_size[MAX_BLKDEV];
+
+extern int * max_readahead[MAX_BLKDEV];
+
+extern int * max_sectors[MAX_BLKDEV];
+
+extern int * max_segments[MAX_BLKDEV];
+
+#define MAX_SEGMENTS 128
+#define MAX_SECTORS 255
+#define MAX_QUEUE_SECTORS (4 << (20 - 9)) /* 4 mbytes when full sized */
+#define MAX_NR_REQUESTS 1024 /* 1024k when in 512 units, normally min is 1M in 1k units */
+
+#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK)
+
+#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue)
+#define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next)
+#define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev)
+#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next)
+#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev)
+
+extern void drive_stat_acct (kdev_t dev, int rw,
+					unsigned long nr_sectors, int new_io);
+
+static inline int get_hardsect_size(kdev_t dev)
+{
+	int retval = 512;
+	int major = MAJOR(dev);
+
+	if (hardsect_size[major]) {
+		int minor = MINOR(dev);
+		if (hardsect_size[major][minor])
+			retval = hardsect_size[major][minor];
+	}
+	return retval;
+}
+
+static inline int blk_oversized_queue(request_queue_t * q)
+{
+	if (q->can_throttle)
+		return atomic_read(&q->nr_sectors) > q->max_queue_sectors;
+	return q->rq.count == 0;
+}
+
+static inline int blk_oversized_queue_reads(request_queue_t * q)
+{
+	if (q->can_throttle)
+		return atomic_read(&q->nr_sectors) > q->max_queue_sectors + q->batch_sectors;
+	return q->rq.count == 0;
+}
+
+static inline int blk_oversized_queue_batch(request_queue_t * q)
+{
+	return atomic_read(&q->nr_sectors) > q->max_queue_sectors - q->batch_sectors;
+}
+
+#define blk_finished_io(nsects)	do { } while (0)
+#define blk_started_io(nsects)	do { } while (0)
+
+static inline void blk_started_sectors(struct request *rq, int count)
+{
+	request_queue_t *q = rq->q;
+	if (q && q->can_throttle) {
+		atomic_add(count, &q->nr_sectors);
+		if (atomic_read(&q->nr_sectors) < 0) {
+			printk("nr_sectors is %d\n", atomic_read(&q->nr_sectors));
+			BUG();
+		}
+	}
+}
+
+static inline void blk_finished_sectors(struct request *rq, int count)
+{
+	request_queue_t *q = rq->q;
+	if (q && q->can_throttle) {
+		atomic_sub(count, &q->nr_sectors);
+		
+		smp_mb();
+		if (q->rq.count >= q->batch_requests && !blk_oversized_queue_batch(q)) {
+			if (waitqueue_active(&q->wait_for_requests))
+				wake_up(&q->wait_for_requests);
+		}
+		if (atomic_read(&q->nr_sectors) < 0) {
+			printk("nr_sectors is %d\n", atomic_read(&q->nr_sectors));
+			BUG();
+		}
+	}
+}
+
+static inline unsigned int blksize_bits(unsigned int size)
+{
+	unsigned int bits = 8;
+	do {
+		bits++;
+		size >>= 1;
+	} while (size > 256);
+	return bits;
+}
+
+static inline unsigned int block_size(kdev_t dev)
+{
+	int retval = BLOCK_SIZE;
+	int major = MAJOR(dev);
+
+	if (blksize_size[major]) {
+		int minor = MINOR(dev);
+		if (blksize_size[major][minor])
+			retval = blksize_size[major][minor];
+	}
+	return retval;
+}
+
+#endif
-- 
2.30.2